import requests
from lxml import etree
import pandas as pd


# 获取电影数据
def get_movie_data(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    }
    response = requests.get(url=url, headers=headers)

    if response.status_code == 200:
        response.encoding = 'utf-8'
    else:
        print('请求失败')
        return None

    tree = etree.HTML(response.text)
    items = tree.xpath("/html/body/div[3]/div[1]/div/div[1]/ol/li")

    data = []
    for i in items:
        movie_name = i.xpath('.//div/div[2]/div[1]/a/span[@class="title"]/text()')
        score = i.xpath('.//div[@class="info"]//span[@class="rating_num"]/text()')
        number_of_viewers = i.xpath('.//div[@class="info"]//span[contains(text(), "人评价")]/text()')
        data.append([movie_name[0], score[0], number_of_viewers[0]])

    return data


# 将数据保存待Excel中
def save_to_excel(data, filename):
    df = pd.DataFrame(data, columns=['电影名称', '评分', '观看人数'])
    df.to_excel(filename, index=False)


if __name__ == '__main__':
    url = 'https://movie.douban.com/top250'
    movie_data = get_movie_data(url)
    if movie_data:
        save_to_excel(movie_data, 'movies.xlsx')
        print('数据保存成功！')
